Chapter 5 Community composition

load("data/data.Rdata")

5.1 Taxonomy overview

5.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ environment + longitude,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_ozs4hxe8te9wy06vn5ub
phylum mean sd
p__Bacteroidota 5.689285e-01 0.1540091282
p__Bacillota_A 1.743849e-01 0.0700812372
p__Pseudomonadota 1.113540e-01 0.1500626212
p__Verrucomicrobiota 6.181210e-02 0.0587656502
p__Bacillota 3.203183e-02 0.0403185065
p__Desulfobacterota 2.632570e-02 0.0270005049
p__Fusobacteriota 8.571358e-03 0.0145576339
p__Bacillota_C 6.103158e-03 0.0072686479
p__Deferribacterota 4.297397e-03 0.0057855372
p__Cyanobacteriota 2.749617e-03 0.0035676690
p__Bacillota_B 2.476044e-03 0.0022425682
p__Elusimicrobiota 8.931074e-04 0.0024994217
p__Chlamydiota 7.224535e-05 0.0001589365
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

5.2 Taxonomy boxplot

5.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_rfj6j08x5xxcp9t349g1
family mean sd
f__Bacteroidaceae 2.471960e-01 0.1325115215
f__Rikenellaceae 1.160874e-01 0.0684497647
f__Tannerellaceae 1.019561e-01 0.0569863869
f__Akkermansiaceae 6.066164e-02 0.0582934333
f__Marinifilaceae 5.988787e-02 0.0475335342
f__Lachnospiraceae 5.808688e-02 0.0415927321
f__Ruminococcaceae 5.115411e-02 0.0373298162
f__Enterobacteriaceae 4.838945e-02 0.1181156814
f__Aeromonadaceae 3.001583e-02 0.0460225099
f__Desulfovibrionaceae 2.632570e-02 0.0270005049
f__ 2.477546e-02 0.0212954773
f__Erysipelotrichaceae 1.773323e-02 0.0146642456
f__Clostridiaceae 1.667605e-02 0.0246251130
f__Mycoplasmoidaceae 1.307127e-02 0.0392437991
f__Moraxellaceae 1.269634e-02 0.0263873382
f__Oscillospiraceae 1.049036e-02 0.0073022058
f__Cellulosilyticaceae 9.674865e-03 0.0174362664
f__Muribaculaceae 9.275306e-03 0.0103502184
f__Fusobacteriaceae 8.571358e-03 0.0145576339
f__CHK158-818 7.802093e-03 0.0092742369
f__Anaerovoracaceae 6.418852e-03 0.0101906138
f__CAG-239 5.594649e-03 0.0085961651
f__Butyricicoccaceae 5.536744e-03 0.0153801549
f__Peptostreptococcaceae 4.616539e-03 0.0103902778
f__P3 4.487408e-03 0.0075825882
f__Mucispirillaceae 4.297397e-03 0.0057855372
f__Pseudomonadaceae 3.189040e-03 0.0060054779
f__Acutalibacteraceae 3.081339e-03 0.0045602908
f__UBA3637 2.817205e-03 0.0073111663
f__Gastranaerophilaceae 2.636235e-03 0.0035305519
f__UBA932 2.456666e-03 0.0033990559
f__Peptococcaceae 2.400061e-03 0.0022145205
f__Succinispiraceae 2.313402e-03 0.0024808879
f__Chromobacteriaceae 2.301141e-03 0.0099058815
f__Pumilibacteraceae 2.194988e-03 0.0025294376
f__Anaerotignaceae 2.153835e-03 0.0024871306
f__Massilibacillaceae 2.101814e-03 0.0044747006
f__Shewanellaceae 1.007172e-03 0.0039548332
f__Chitinibacteraceae 1.000738e-03 0.0024966652
f__UBA3830 9.957148e-04 0.0015639545
f__Xanthobacteraceae 9.310902e-04 0.0037227287
f__Coprobacillaceae 9.258861e-04 0.0021323691
f__Elusimicrobiaceae 8.931074e-04 0.0024994217
f__Burkholderiaceae_A 8.678700e-04 0.0024729975
f__UBA1997 7.055924e-04 0.0021616219
f__Coprobacteraceae 6.020864e-04 0.0009582824
f__Sedimentibacteraceae 5.766507e-04 0.0007547715
f__UBA1820 5.595296e-04 0.0008649613
f__CAG-508 4.668877e-04 0.0025549082
f__Eubacteriaceae 3.514209e-04 0.0006269121
f__Borkfalkiaceae 2.249938e-04 0.0004160226
f__CALYAR01 1.572887e-04 0.0002786771
f__Enterococcaceae 1.502679e-04 0.0008889969
f__CALVMC01 9.823090e-05 0.0005216160
f__UBA660 7.705595e-05 0.0001929114
f__UBA7702 7.598288e-05 0.0001804883
f__GCF-1484045 7.412128e-05 0.0004385074
f__Chlamydiaceae 7.224535e-05 0.0001589365
f__UBA3700 6.138625e-05 0.0003631659
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per origin
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~environment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

5.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_jhj6ju4891co4sjyph7d
genus mean sd
Bacteroides 2.435662e-01 0.1318569731
Parabacteroides 7.712066e-02 0.0442873518
Mucinivorans 5.485809e-02 0.0441493383
Odoribacter 3.998704e-02 0.0324395582
Akkermansia 3.699454e-02 0.0444888608
Aeromonas 3.001583e-02 0.0460225099
Parabacteroides_B 2.483548e-02 0.0277427000
Hafnia 2.227064e-02 0.1156606985
JADFUS01 1.771383e-02 0.0105985347
Alistipes 1.471418e-02 0.0109627462
Plesiomonas 1.454771e-02 0.0327940636
UBA866 1.419549e-02 0.0171150172
Clostridium 1.372645e-02 0.0226197767
Bilophila 1.331878e-02 0.0198622466
Acinetobacter 1.269634e-02 0.0263873382
14-2 1.233104e-02 0.0265894295
Clostridium_Q 1.171644e-02 0.0162699532
Mycoplasma_L 1.065393e-02 0.0394829437
Dielma 1.042251e-02 0.0127504085
CAJGBR01 9.607999e-03 0.0085952436
HGM05232 9.275306e-03 0.0103502184
Cetobacterium 7.826324e-03 0.0142283544
Gallibacteroides 7.802093e-03 0.0092742369
JAIHAL01 6.871180e-03 0.0126256687
Angelakisella 6.505557e-03 0.0057582329
Buttiauxella 6.133033e-03 0.0189800892
RGIG3102 5.734736e-03 0.0099264590
Hydrogenoanaerobacterium 5.697736e-03 0.0062384576
SZUA-378 4.928183e-03 0.0137349713
Pseudoflavonifractor 4.242479e-03 0.0043794439
Anaerotruncus 4.137290e-03 0.0042016918
Anaerovorax 4.013726e-03 0.0091630296
Butyricimonas 3.937359e-03 0.0043410385
UMGS1251 3.563237e-03 0.0054497977
Pseudomonas_E 3.189040e-03 0.0060054779
Sarcina 2.949596e-03 0.0057753291
Intestinimonas 2.941046e-03 0.0028260997
Mobilisporobacter 2.538844e-03 0.0044476714
Tidjanibacter 2.526410e-03 0.0022745134
Hungatella_A 2.509723e-03 0.0030300415
Bacteroides_G 2.482777e-03 0.0030685072
Egerieousia 2.456666e-03 0.0033990559
Malacoplasma 2.417336e-03 0.0054607581
Craterilacuibacter 2.301141e-03 0.0099058815
Alistipes_A 2.241040e-03 0.0019536428
Budvicia 2.215264e-03 0.0109761121
Avirikenella 1.958926e-03 0.0027352825
Paraclostridium 1.949494e-03 0.0097500062
Anaerorhabdus 1.842258e-03 0.0027674736
Serratia_A 1.746917e-03 0.0067047320
UMGS1202 1.735431e-03 0.0017442272
JAGAJR01 1.657831e-03 0.0037723464
Romboutsia_A 1.631884e-03 0.0027175524
Amedibacillus 1.617023e-03 0.0030514424
Gallalistipes 1.467926e-03 0.0012004170
JAAYQI01 1.419084e-03 0.0023350948
Intestinibacillus 1.312996e-03 0.0016471339
Phocea 1.264862e-03 0.0022185638
JAHHTP01 1.218787e-03 0.0014210421
Massiliimalia 1.160964e-03 0.0023829475
RGIG4140 1.144305e-03 0.0064919092
Aminipila 1.100200e-03 0.0023493631
Copranaerobaculum 1.063109e-03 0.0042027196
JAJBUQ01 1.044411e-03 0.0017072520
Romboutsia_D 1.035162e-03 0.0025533354
Shewanella 1.007172e-03 0.0039548332
Ruthenibacterium 1.000989e-03 0.0016556573
Deefgea 1.000738e-03 0.0024966652
Bradyrhizobium 9.310902e-04 0.0037227287
Coprobacillus 9.258861e-04 0.0021323691
Rikenella 8.870162e-04 0.0014086439
JAEZVV01 8.678700e-04 0.0024729975
RGIG7389 8.305951e-04 0.0010529671
JAGNZR01 7.450332e-04 0.0026332724
Kluyvera 7.354109e-04 0.0033265669
Bacilliculturomica 7.159179e-04 0.0013304547
Spyradomonas 7.146271e-04 0.0012823418
WRKB01 6.465109e-04 0.0015707534
Evtepia 6.395747e-04 0.0006982343
Anaerotignum 6.131710e-04 0.0012176488
Coprobacter 6.020864e-04 0.0009582824
MGBC133411 5.894437e-04 0.0009156972
Negativibacillus 5.804496e-04 0.0006835800
Robinsoniella 5.353474e-04 0.0010897149
IOR16 5.023546e-04 0.0007464032
Muricomes 4.994820e-04 0.0008485060
UBA7488 4.750299e-04 0.0009978028
RGIG8482 4.668877e-04 0.0025549082
CAKVBE01 4.333183e-04 0.0014714300
Citrobacter 4.112408e-04 0.0013787481
Faecalimonas 3.688513e-04 0.0008326873
Amedibacterium 3.478068e-04 0.0016606843
UBA1174 3.335192e-04 0.0019010146
SIG603 3.300671e-04 0.0005805146
Yersinia 3.292357e-04 0.0012233173
HGM16780 3.067475e-04 0.0013455312
Fimivivens 3.047193e-04 0.0004088726
UBA1794 3.009228e-04 0.0005607342
Longicatena 2.583333e-04 0.0015283205
Dysosmobacter 2.037202e-04 0.0003972865
JAGPHI01 1.805939e-04 0.0004150693
Enterococcus 1.502679e-04 0.0008889969
CALXSC01 1.224638e-04 0.0004012663
Hespellia 1.135437e-04 0.0002278677
Massilioclostridium 1.117290e-04 0.0002591622
51-20 9.051798e-05 0.0005355116
MGBC107952 7.705595e-05 0.0001929114
Scatenecus 7.620763e-05 0.0004062042
Cryptoclostridium 7.598288e-05 0.0001804883
Lactonifactor 6.094989e-05 0.0002283341
CAZU01 4.914752e-05 0.0002907606
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per pond
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~environment)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")